# Replication archive for 
# Coppock, Alexander, Donald P. Green, and Ethan Porter. 2022. 
# Does Digital Advertising Affect Vote Choice? Evidence from a Randomized Field Experiment 
# Research & Politics. 

rm(list = ls())
# setwd() to replication archive

library(DeclareDesign)
library(tidyverse)

precinct_level <- read_rds("CGP_2022_precinct_level.rds")
zip_code_level <- read_rds("CGP_2022_zip_code_level.rds")

fl_blocked_zips <-
  zip_code_level %>% 
  transmute(vb_tsmart_zip = vb_tsmart_zip, block_id)

# post-hager priors 
# rnorm(n = 1, mean = 0.010, sd = 0.005)

fl_random_assignment_function <-
  function(data) {
    new_randomization <-
      fl_blocked_zips %>%
      mutate(
        Zsim = block_ra(block_id),
        treatment = as.character(block_ra(
          Zsim,
          conditions = c("video_1", "video_2")
        )),
        treatment = ifelse(Zsim == 1, treatment, "control")
      )
    left_join(data, select(new_randomization, vb_tsmart_zip, Zsim), by = "vb_tsmart_zip")
  }



# Design declaration ------------------------------------------------

design <-
  declare_model(data = precinct_level) +
  declare_model(
    potential_outcomes(
      Y ~ rnorm(n = 1, mean = 0.010, sd = 0.005) * Zsim + dem_two_party_share_2018,
      conditions = list(Zsim = c(0, 1)
      )
    )) +
  declare_assignment(handler = fl_random_assignment_function) +
  declare_measurement(Y = reveal_outcomes(Y ~ Zsim)) +
  declare_estimator(
    Y ~ Zsim +
      dem_two_party_vote_margin_2016_nona + vote_2016_missing +
      dem_two_party_vote_margin_2014_nona + vote_2014_missing +
      dem_two_party_vote_margin_2012_nona + vote_2012_missing,
    clusters = vb_tsmart_zip
  )


# Simulation --------------------------------------------------------

# simulations <- simulate_design(design, sims = 5000)
simulations <- simulate_design(design, sims = 500)

# Using each simulated estimate to generate posteriors --------------

simulations <-
  simulations %>%
  mutate(
    prior_estimate = 0.010,
    prior_std.error = 0.005,
    prior_weight = 1 / (prior_std.error ^ 2),
    
    data_estimate = estimate,
    data_std.error = std.error,
    data_conf.low = data_estimate - 1.96*data_std.error,
    data_conf.high = data_estimate + 1.96*data_std.error,
    data_weight = 1 / (data_std.error ^ 2),
    data_p.upper = pnorm(statistic, lower.tail = FALSE),
    data_p.twotailed = pnorm(abs(statistic), lower.tail = FALSE) * 2,
    data_significant = as.numeric(data_p.upper <= 0.05),
    
    posterior_estimate =
      (data_estimate * data_weight + prior_estimate * prior_weight) /
      (data_weight + prior_weight),
    posterior_std.error = sqrt(1 / (data_weight + prior_weight)),
    posterior_conf.low = posterior_estimate - 1.96*posterior_std.error,
    posterior_conf.high = posterior_estimate + 1.96*posterior_std.error,
    
    posterior_statistic = posterior_estimate / posterior_std.error,
    posterior_p.upper = pnorm(posterior_statistic, lower.tail = FALSE),
    posterior_p.twotailed = pnorm(abs(posterior_statistic), lower.tail = FALSE) * 2,
    posterior_significant = as.numeric(posterior_p.upper <= 0.05),
  )


# calculate diagnosands ---------------------------------------------


diagnosands <-
  simulations %>%
  summarise(
    mean_data_estimate = mean(data_estimate),
    true_data_std.error = sd(data_estimate),
    mean_posterior_estimate = mean(posterior_estimate),
    true_posterior_std.error = sd(posterior_estimate),
    power_data_upper = mean(data_p.upper <= 0.05),
    power_data_two_tailed = mean(data_p.twotailed <= 0.05),
    power_posterior_upper = mean(posterior_p.upper <= 0.05),
    power_posterior_two_tailed = mean(posterior_p.twotailed <= 0.05)
  )

diagnosands

gg_df <-
  simulations %>%
  mutate(sim_ID = fct_reorder(factor(sim_ID), data_estimate)) %>% 
  pivot_longer(cols = starts_with(c("data", "posterior"))) %>%
  select(sim_ID, name, value) %>% 
  separate(name,
           into = c("estimator", "statistic"),
           sep = "_") %>%
  pivot_wider(
    id_cols = c(sim_ID, estimator),
    names_from = statistic,
    values_from = value
  ) %>% 
  mutate(significant = factor(significant),
         facet = if_else(estimator == "data", "Simulated OLS estimate", "Simulated posterior estimate"))

label_df <- 
  gg_df %>% 
  group_by(facet) %>% 
  summarise(power = mean(significant == 1),
            label = paste0("Power when\nPATE = 0.1\n", DeclareDesign:::format_num(power, 3)))

g <-
  ggplot(gg_df) +
  aes(estimate, sim_ID, color = significant) +
  geom_linerange(aes(xmin = conf.low, xmax = conf.high), alpha = 0.4) +
  geom_point(alpha = 0.5,
             stroke = 0,
             size = 1) +
  geom_text(
    data = label_df,
    aes(
      x = 0.035,
      y = 500,
      label = label,
      color = NULL
    ),
    hjust = 0,
    size = 4
  ) +
  scale_color_manual(values = c(gray(0.8), gray(0.2))) +
  theme_minimal() +
  theme(
    axis.text.y = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks.y = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    legend.position = "none"
  ) +
  geom_vline(xintercept = 0,
             linetype = "dashed",
             alpha = 0.5) +
  geom_vline(xintercept = 0.01,
             linetype = "dotted",
             alpha = 0.5) +
  labs(x = "Simulated average treatment effect estimate") +
  facet_wrap( ~ facet)
g

# ggsave("../output_for_overleaf/florida_design_analysis.pdf", g, width = 6.5, height = 4)
